Affine ================= 将来自输入数据的若干行按照一个“context”(偏移集合)拼接/裁切成一个中间矩阵,再对该中间矩阵与权重做一次矩阵乘和偏置加法操作,最后应用激活函数(如果指定),得到输出结果。 该算子支持全量运行和增量运行两种模式,并维护了上一次全窗口的输出(previous\_output)以支持增量更新。 输入: - **input0** - 输入数据张量地址。 - **input1** - 权重矩阵地址。 - **input2** - 偏置向量地址。 - **input0_shape** - 输入数据形状数组,长度为3,第一维值为1。 - **input1_shape** - 权重矩阵形状数组,长度为3,第一维值为1。 - **input2_shape** - 偏置向量形状数组,长度为3,第一维值为1。 - **output_shape** - 输出张量形状数组,长度为3,第一维值为1。 - **context** - 上下文索引数组,值递增。 - **context_size** - 上下文大小。 - **output_dim** - 输出维度,即输入张量最后一维大小乘以上下文大小。 - **activation_type** - 激活函数类型,0-8。 - **is_full_run** - 全量运行标志指针,所指值为1时为全量更新(运行后修改为0),为0时为增量更新。 - **full_input** - 全量输入缓冲区地址。 - **full_input_shape** - 全量输入形状数组。 - **increment_input** - 增量输入缓冲区地址。 - **increment_input_shape** - 增量输入形状数组。 - **increment_output** - 增量输出缓冲区地址。 - **increment_output_shape** - 增量输出形状数组。 - **previous_output** - 先前输出缓冲区地址。 - **previous_output_shape** - 先前输出形状数组。 - **core_mask(int, 可选)** - 核掩码(仅适用于共享存储版本)。 输出: - **output** - 仿射变换结果张量地址。 支持平台: ``FT78NE`` ``MT7004`` .. note:: - FT78NE 支持的数据类型:int8, fp32 - MT7004 支持的数据类型:fp16, fp32 **激活函数类型定义:** .. code-block:: c :linenos: #define ActivationType_NO_ACTIVATION 0 // 无激活函数 #define ActivationType_RELU 1 // ReLU激活函数 #define ActivationType_RELU6 2 // ReLU6激活函数 #define ActivationType_SIGMOID 3 // Sigmoid激活函数 #define ActivationType_TANH 4 // Tanh激活函数 #define ActivationType_SWISH 5 // Swish激活函数 #define ActivationType_HSWISH 6 // Hard Swish激活函数 #define ActivationType_HSIGMOID 7 // Hard Sigmoid激活函数 #define ActivationType_SOFTPLUS 8 // Softplus激活函数 **激活函数数学公式:** - **ReLU**: :math:`f(x) = \max(0, x)` - **ReLU6**: :math:`f(x) = \min(\max(0, x), 6)` - **Sigmoid**: :math:`f(x) = \frac{1}{1 + e^{-x}}` - **Tanh**: :math:`f(x) = \frac{e^x - e^{-x}}{e^x + e^{-x}}` - **Swish**: :math:`f(x) = x \cdot \sigma(x) = \frac{x}{1 + e^{-x}}` - **Hard Swish**: :math:`f(x) = x \cdot \frac{\min(\max(x + 3, 0), 6)}{6}` - **Hard Sigmoid**: :math:`f(x) = \frac{\min(\max(x + 3, 0), 6)}{6}` - **Softplus**: :math:`f(x) = ln(1 + e^x)` **参数数组结构:** .. code-block:: c :linenos: long long params[21]; params[0] = (long long)input0; // 输入数据张量地址 params[1] = (long long)input1; // 权重矩阵地址 params[2] = (long long)input2; // 偏置向量地址 params[3] = (long long)output; // 输出张量地址 params[4] = (long long)input0_shape; // 输入数据形状数组 params[5] = (long long)input1_shape; // 权重矩阵形状数组 params[6] = (long long)input2_shape; // 偏置向量形状数组 params[7] = (long long)output_shape; // 输出张量形状数组 params[8] = (long long)context; // 上下文索引数组 params[9] = (long long)context_size; // 上下文大小 params[10] = (long long)output_dim; // 输出维度 params[11] = (long long)activation_type; // 激活函数类型 params[12] = (long long)&is_full_run; // 全量运行标志指针 params[13] = (long long)full_input; // 全量输入缓冲区地址 params[14] = (long long)full_input_shape; // 全量输入形状数组 params[15] = (long long)increment_input; // 增量输入缓冲区地址 params[16] = (long long)increment_input_shape; // 增量输入形状数组 params[17] = (long long)increment_output; // 增量输出缓冲区地址 params[18] = (long long)increment_output_shape; // 增量输出形状数组 params[19] = (long long)previous_output; // 先前输出缓冲区地址 params[20] = (long long)previous_output_shape; // 先前输出形状数组 **共享存储版本:** .. c:function:: void i8_affine_s(long long* params, int core_mask) .. c:function:: void fp_affine_s(long long* params, int core_mask) .. c:function:: void hp_affine_s(long long* params, int core_mask) **C调用示例:** .. code-block:: c :linenos: :emphasize-lines: 87 // FT78NE 多核示例 #include #include #include #include void test_fp_affine_s(int a, int b, int c, int o, int activation_type, int full_run, int core_mask) { int i = 0, j = 0; srand(time(0)); int core_id = DNUM; int logic_core_id = GetLogicCoreId(core_mask, core_id); int num = GetCoreNum(core_mask); int is_full_run = full_run; int context[] = {-1, 0, 1, 2}; int context_size = c; int output_dim = b * c; // 形状定义 int input0_shape[3] = {1, a, b}; int input1_shape[3] = {1, b * c, o}; int input2_shape[3] = {1, a - c + 1, o}; int output_shape[3] = {1, a - c + 1, o}; // 中间缓冲区形状 int full_input_shape[3] = {1, input0_shape[1] - (context[context_size - 1] - context[0]), output_dim}; int increment_input_shape[3] = {1, 1, output_dim}; int increment_output_shape[3] = {1, 1, output_shape[2]}; int previous_output_shape[3] = {1, output_shape[1], output_shape[2]}; // 内存分配 float* input0 = (float*)(0xA0400000); float* input1 = (float*)(0xA0400000 + 0x100000); float* input2 = (float*)(0xA0400000 + 0x200000); float* output = (float*)(0xA0400000 + 0x300000); float* full_input = (float*)(0xA0400000 + 0x400000); float* increment_input = (float*)(0xA0400000 + 0x500000); float* increment_output = (float*)(0xA0400000 + 0x600000); float* previous_output = (float*)(0xA0400000 + 0x700000); // 初始化数据 if (logic_core_id == 0) { int input0_len = input0_shape[0] * input0_shape[1] * input0_shape[2]; int input1_len = input1_shape[0] * input1_shape[1] * input1_shape[2]; int input2_len = input2_shape[0] * input2_shape[1] * input2_shape[2]; for (i = 0; i < input0_len; i++) { input0[i] = ((float)rand() / RAND_MAX) * 2 - 1; } for (i = 0; i < input1_len; i++) { input1[i] = ((float)rand() / RAND_MAX) * 2 - 1; } for (i = 0; i < input2_shape[2]; i++) { input2[i] = ((float)rand() / RAND_MAX) * 2 - 1; for (j = 1; j < input2_shape[1]; j++) { input2[i + j * input2_shape[2]] = input2[i]; } } } // 准备参数数组 long long params[21]; params[0] = (long long)input0; params[1] = (long long)input1; params[2] = (long long)input2; params[3] = (long long)output; params[4] = (long long)input0_shape; params[5] = (long long)input1_shape; params[6] = (long long)input2_shape; params[7] = (long long)output_shape; params[8] = (long long)context; params[9] = (long long)context_size; params[10] = (long long)output_dim; params[11] = (long long)activation_type; params[12] = (long long)&is_full_run; params[13] = (long long)full_input; params[14] = (long long)full_input_shape; params[15] = (long long)increment_input; params[16] = (long long)increment_input_shape; params[17] = (long long)increment_output; params[18] = (long long)increment_output_shape; params[19] = (long long)previous_output; params[20] = (long long)previous_output_shape; // 执行 Affine 操作 fp_affine_s(params, core_mask); } int main(void) { int a = 23, b = 31, c = 4, o = 29; int activation_type = 0; // 激活函数类型 int full_run = 1; // 全量运行标志 int core_mask = 0xff; // 核掩码 test_fp_affine_s(a, b, c, o, activation_type, full_run, core_mask); return 0; } **私有存储版本:** .. c:function:: void i8_affine_p(long long* params) .. c:function:: void fp_affine_p(long long* params) .. c:function:: void hp_affine_p(long long* params) **C调用示例:** .. code-block:: c :linenos: :emphasize-lines: 57 // FT78NE 单核示例 #include #include int main(void) { // 参数设置(与共享版本类似) int a = 32, b = 16, c = 4, o = 16; int is_full_run = full_run; int context[] = {-1, 0, 1, 2}; int context_size = c; int output_dim = b * c; int input0_shape[3] = {1, a, b}; int input1_shape[3] = {1, b * c, o}; int input2_shape[3] = {1, a - c + 1, o}; int output_shape[3] = {1, a - c + 1, o}; int full_input_shape[3] = {1, input0_shape[1] - (context[context_size - 1] - context[0]), output_dim}; int increment_input_shape[3] = {1, 1, output_dim}; int increment_output_shape[3] = {1, 1, output_shape[2]}; int previous_output_shape[3] = {1, output_shape[1], output_shape[2]}; float* input0 = (float*)(0x10810000); float* input1 = (float*)(0x10810000 + 0x100000); float* input2 = (float*)(0x10810000 + 0x200000); float* output = (float*)(0x10810000 + 0x300000); float* full_input = (float*)(0x10810000 + 0x400000); float* increment_input = (float*)(0x10810000 + 0x500000); float* increment_output = (float*)(0x10810000 + 0x600000); float* previous_output = (float*)(0x10810000 + 0x700000); // 准备参数数组(与共享版本相同) long long params[21]; params[0] = (long long)input0; params[1] = (long long)input1; params[2] = (long long)input2; params[3] = (long long)output; params[4] = (long long)input0_shape; params[5] = (long long)input1_shape; params[6] = (long long)input2_shape; params[7] = (long long)output_shape; params[8] = (long long)context; params[9] = (long long)context_size; params[10] = (long long)output_dim; params[11] = (long long)activation_type; params[12] = (long long)&is_full_run; params[13] = (long long)full_input; params[14] = (long long)full_input_shape; params[15] = (long long)increment_input; params[16] = (long long)increment_input_shape; params[17] = (long long)increment_output; params[18] = (long long)increment_output_shape; params[19] = (long long)previous_output; params[20] = (long long)previous_output_shape; // 调用 Affine fp_affine_p(params); return 0; }